Setup

Carregar llibreries

library(plyr)
library(ggplot2)
library(rstudioapi)

Funcions

# Calcular la mitja Circular del vector X amb els noms dels angles
circular.mean <- function(X) {
  x <- mean(cos(as.numeric(names(X))) * X)
  y <- mean(sin(as.numeric(names(X))) * X)
  n <- sum(X)
  theta.mean <- atan2(y, x)
  if(theta.mean < 0)
    theta.mean = theta.mean + 2 * pi
  theta.var = sqrt(x^2 + y^2) / n
  return(c(theta.mean, theta.var))
}

# Calcular la mitja donat el vector de dades (X) la quantitat per cada classe (Y) i el nou ordre (I)
resample.means <- function(X, Y, I) {
  n <- length(Y)
  Y.means <- numeric(n)
  X.index = 0
  for(i in 1:n) {
    Y.means[i] <- mean(X[I[(X.index + 1):(X.index + Y[i])]])
    X.index = X.index + Y[i]
  }
  names(Y.means) <- names(Y)
  return(Y.means)
}

Carregar dades

# Agafar directori del fitxer
path = dirname(rstudioapi::getSourceEditorContext()$path)
ds.og.file = paste(path, "/Accidents_Trafic_Catalunya.csv", sep = '')
ds.file =  paste(path, "/ATC.csv", sep = '')
update = F

if(update || !file.exists(ds.file)) {
  dataset <- read.csv(ds.og.file)
  # Corregir l'hora
  for(i in 1:nrow(dataset)) {
    di <- dataset[i,]
    if(di$grupHor != 'Nit') {
      if(di$hor < 600) {
        if(di$hor < 60)
          di$hor = di$hor * 100
        else
          di$hor = di$hor * 10
      }
    } else {
      if(di$hor < 60 || (220 <= di$hor && di$hor < 240)) {
        if(di$hor < 6 || (21 < di$hor && di$hor < 24))
            di$hor = di$hor * 100
        else if(0 < di$hor%%10 && di$hor%%10 < 5)
          di$hor = di$hor * 10
      }
    }
    dataset$hor[i] = di$hor%/%100
    dataset$min[i] = di$hor%%100
  }
  # Passar a data i extreure els factors importanta
  dataset$data = as.Date(dataset$dat, '%d/%m/%Y')
  dataset$diaSem = factor(weekdays(dataset$data))
  dataset$mes = factor(months(dataset$data))
  # Eliminar velocitats falses
  dataset$C_VELOCITAT_VIA[dataset$C_VELOCITAT_VIA == 999] = NA
  dataset$C_VELOCITAT_VIA[dataset$C_VELOCITAT_VIA == 99] = NA
  dataset$C_VELOCITAT_VIA[dataset$C_VELOCITAT_VIA == 0] = NA
  # Guardar fitxer corretgit
  write.csv(dataset, file = ds.file)
} else {
  # Carrregar fitxer corregit
  dataset <- read.csv(ds.file)
  dataset$data = as.Date(dataset$dat, '%d/%m/%Y')
}

Moment

Hora del dia

# Vectors rellevants
nom.hor <- format(ISOdate(0,1,1, 0:23),"%Hh")
color.grupHor <- c(rep('darkblue', 6), rep('yellow', 8), rep('orange', 8), rep('darkblue', 2))
val.hor <- 0:24

# DataFrame de valor importants
d.hora.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$hor, mean)[val.hor],
                        morts = tapply(dataset$F_MORTS, dataset$hor, mean)[val.hor],
                        n = tapply(dataset$F_VICTIMES, dataset$hor, length)[val.hor],
                        nom = nom.hor,
                        color = color.grupHor)

# Plots de les dades
ggplot(data = d.hora.VM, aes(x = nom, y = n), col) +
  geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.hora.VM$color, color = 'black') + 
  ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Hora") +
  coord_polar()

ggplot(data = d.hora.VM, aes(x = nom, y = victimes), col) +
  geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.hora.VM$color, color = 'black') + 
  ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Hora") +
  coord_polar()

ggplot(data = d.hora.VM, aes(x = nom, y = morts), col) +
  geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.hora.VM$color, color = 'black') + 
  ggtitle("Morts per accident") + ylab("Mitja") + xlab("Hora") +
  coord_polar()

Analisi

# Conjunt de dades amb l'hora i l'angle
d.hora.all <- data.frame(victimes = dataset$F_VICTIMES,
                         morts = dataset$F_MORTS,
                         hora = dataset$hor,
                         theta = dataset$hor * pi / 12)

# Resultats de la mitja circular
d.hora.V <- circular.mean(tapply(d.hora.all$victimes, d.hora.all$theta, mean))
d.hora.M <- circular.mean(tapply(d.hora.all$morts, d.hora.all$theta, mean))

d.hora.theta.V <- d.hora.V[1]
d.hora.var.V <- d.hora.V[2]
d.hora.theta.M <- d.hora.M[1]
d.hora.var.M <- d.hora.M[2]

# Preparació pel test permutacional
n.rep <- 10000
n.set <- nrow(d.hora.all)
d.hora.rep <- tapply(d.hora.all$hora, d.hora.all$theta, length)

p.hora.theta.V <- numeric(n.rep)
p.hora.var.V <- numeric(n.rep)
p.hora.theta.M <- numeric(n.rep)
p.hora.var.M <- numeric(n.rep)

# Fer les permutacions
for(i in 1:n.rep) {
  perm <- sample(n.set)
  p.hora.V <- circular.mean(resample.means(d.hora.all$victimes, d.hora.rep ,perm))
  p.hora.M <- circular.mean(resample.means(d.hora.all$morts, d.hora.rep, perm))
  p.hora.theta.V[i] <- p.hora.V[1]
  p.hora.var.V[i] <- p.hora.V[2]
  p.hora.theta.M[i] <- p.hora.M[1]
  p.hora.var.M[i] <- p.hora.M[2]
}

# Plot dels resultats (Victimes)
plot.hora.limit.V <- max(c(p.hora.var.V, d.hora.var.V * 1.1))
hist(p.hora.var.V, xlim = c(0, plot.hora.limit.V), main = '"Variancia" de l\'hora per victimes')
p.value.hora.V <- sum(p.hora.var.V >= d.hora.var.V) / n.rep
lines(rep(d.hora.var.V, 2), c(0, n.rep), col = 'red')

# Plot dels resultats (Morts)
plot.hora.limit.M <- max(c(p.hora.var.M, d.hora.var.M * 1.1))
hist(p.hora.var.M, xlim = c(0, plot.hora.limit.M), main = '"Variancia" de l\'hora per morts')
p.value.hora.M <- sum(p.hora.var.M >= d.hora.var.M) / n.rep
lines(rep(d.hora.var.M, 2), c(0, n.rep), col = 'red')

# Mostra les dades
{
  cat('Víctimes p-value:', p.value.hora.V, '\n')
  cat('Morts p-value:', p.value.hora.M, '\n')
  cat('Víctimes mitja circular:', d.hora.theta.V * 12 / pi, ', variància circular:', d.hora.var.V, '\n')
  cat('Morts mitja circular:', d.hora.theta.M * 12 / pi, ', variància circular:', d.hora.var.M, '\n')
}
## Víctimes p-value: 0 
## Morts p-value: 0 
## Víctimes mitja circular: 2.015411 , variància circular: 0.001635155 
## Morts mitja circular: 2.603732 , variància circular: 0.008263661
# Scatter plots de les permutacions (Victimes)
plot.hora.limitX.V <- c(min(p.hora.var.V * sin(p.hora.theta.V)), d.hora.var.V * sin(d.hora.theta.V) * 1.1)
plot.hora.limitY.V <- c(min(p.hora.var.V * cos(p.hora.theta.V)), d.hora.var.V * cos(d.hora.theta.V) * 1.1)

plot(p.hora.var.V * sin(p.hora.theta.V), p.hora.var.V * cos(p.hora.theta.V),
     asp = 1, xlim = plot.hora.limitX.V, ylim = plot.hora.limitY.V, col = 'blue', lwd = .1, cex = .3,
     xlab = '', ylab = '', main = 'scatter plot de les permutacions per victimes')
points(d.hora.var.V * sin(d.hora.theta.V), d.hora.var.V * cos(d.hora.theta.V),
       col = 'red', cex = .5)

# Scatter plots de les permutacions (Morts)
plot.hora.limitX.M <- c(min(p.hora.var.M * sin(p.hora.theta.M)), d.hora.var.M * sin(d.hora.theta.M) * 1.1)
plot.hora.limitY.M <- c(min(p.hora.var.M * cos(p.hora.theta.M)), d.hora.var.M * cos(d.hora.theta.M) * 1.1)

plot(p.hora.var.M * sin(p.hora.theta.M), p.hora.var.M * cos(p.hora.theta.M),
     asp = 1, xlim = plot.hora.limitX.M, ylim = plot.hora.limitY.M, col = 'blue', lwd = .1, cex = .3,
     xlab = '', ylab = '', main = 'scatter plot de les permutacions per morts')
points(d.hora.var.M * sin(d.hora.theta.M), d.hora.var.M * cos(d.hora.theta.M),
       col = 'red', cex = .5)

Dia de la setmana

# Vectors rellevants
nom.diaSem <- weekdays(as.Date('24/05/2021', '%d/%m/%Y')+0:6)
nom.tipusDia <- c(rep('laborable', 5), rep('fi de setmana', 2))
color.feiner <- c(rep('red', 5), rep('blue', 2))

# DataFrame de les dades importants
d.dia.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$diaSem, mean)[nom.diaSem],
                       morts = tapply(dataset$F_MORTS, dataset$diaSem, mean)[nom.diaSem],
                       n = tapply(dataset$F_VICTIMES, dataset$diaSem, length)[nom.diaSem],
                       nom = factor(nom.diaSem, levels = nom.diaSem),
                       color = color.feiner,
                       tipus = factor(nom.tipusDia, levels = c('laborable', 'fi de setmana')))

# Plots de les dades
ggplot(data = d.dia.VM, aes(y = n, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = d.dia.VM$color, color = 'black') +
  ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Dia de la setmana")

ggplot(data = d.dia.VM, aes(y = victimes, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = d.dia.VM$color, color = 'black') +
  ggtitle("Victimes") + ylab("Mitja") + xlab("Dia de la setmana")

ggplot(data = d.dia.VM, aes(y = morts, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = d.dia.VM$color, color = 'black') +
  ggtitle("Morts") + ylab("Mitja") + xlab("Dia de la setmana")

Analisi

# agafar dades respecte el dia de la setmana
d.dia.all <- data.frame(victimes = dataset$F_VICTIMES,
                         morts = dataset$F_MORTS,
                         dia = factor(dataset$diaSem, levels = nom.diaSem),
                         tipus = factor(dataset$diaSem, levels = nom.diaSem))
levels(d.dia.all$tipus) <- nom.tipusDia

# Generem els models lineals per probar
d.dia.fit.lab.V <- lm(d.dia.all$victimes[d.dia.all$tipus == 'laborable']~
                      factor(d.dia.all$dia[d.dia.all$tipus == 'laborable']))
d.dia.fit.lab.M <- lm(d.dia.all$morts[d.dia.all$tipus == 'laborable']~
                      factor(d.dia.all$dia[d.dia.all$tipus == 'laborable']))

d.dia.fit.dia6.V <- lm(d.dia.all$victimes[d.dia.all$dia != nom.diaSem[7]]~
         factor(d.dia.all$tipus[d.dia.all$dia != nom.diaSem[7]]))
d.dia.fit.dia6.M <- lm(d.dia.all$morts[d.dia.all$dia != nom.diaSem[7]]~
         factor(d.dia.all$tipus[d.dia.all$dia != nom.diaSem[7]]))

d.dia.fit.fds.V <- lm(d.dia.all$victimes[d.dia.all$tipus == 'fi de setmana']~
                      factor(d.dia.all$dia[d.dia.all$tipus == 'fi de setmana']))
d.dia.fit.fds.M <- lm(d.dia.all$morts[d.dia.all$tipus == 'fi de setmana']~
                      factor(d.dia.all$dia[d.dia.all$tipus == 'fi de setmana']))

# Comparació amb test anova (aov)
{
  cat('p-value per victimes comparant dies laborables:',
      summary(aov(d.dia.fit.lab.V))[[1]]$`Pr(>F)`[1], '\n')
  cat('p-value per morts comparant dies laborables:',
      summary(aov(d.dia.fit.lab.M))[[1]]$`Pr(>F)`[1], '\n')
  cat('Tots els dies laborables són iguals\n\n')
  
  cat('p-value per victimes comparant dies laborables i dissabte:',
      summary(aov(d.dia.fit.dia6.V))[[1]]$`Pr(>F)`[1], '\n')
  cat('p-value per morts comparant dies laborables i dissabte:',
      summary(aov(d.dia.fit.dia6.M))[[1]]$`Pr(>F)`[1], '\n')
  cat('A dissabte el numero per accident augmenta (respecte els laborables)\n\n')
  
  cat('p-value per victimes comparant dissabte i diumenge:',
      summary(aov(d.dia.fit.fds.V))[[1]]$`Pr(>F)`[1], '\n')
  cat('p-value per morts comparant dies dissabte i diumenge:',
      summary(aov(d.dia.fit.fds.M))[[1]]$`Pr(>F)`[1], '\n')
  cat('A diumenge el numero de victimes accident augmenta signnificativament pero el de morts no (Respecte dissabte)\n\n')
}
## p-value per victimes comparant dies laborables: 0.4398894 
## p-value per morts comparant dies laborables: 0.961798 
## Tots els dies laborables són iguals
## 
## p-value per victimes comparant dies laborables i dissabte: 2.363101e-23 
## p-value per morts comparant dies laborables i dissabte: 0.00154988 
## A dissabte el numero per accident augmenta (respecte els laborables)
## 
## p-value per victimes comparant dissabte i diumenge: 0.002842418 
## p-value per morts comparant dies dissabte i diumenge: 0.2385609 
## A diumenge el numero de victimes accident augmenta signnificativament pero el de morts no (Respecte dissabte)

Epoca de l’any

# Vectors rellevants
nom.mes <- format(ISOdate(0,1:12,1), '%B')
color.estacio <- c(rep('cyan', 2), rep('green', 3), rep('yellow', 3), rep('orange', 3), 'cyan')

# DataFrame de les dades importants
d.mes.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$mes, mean)[nom.mes],
                        morts = tapply(dataset$F_MORTS, dataset$mes, mean)[nom.mes],
                        n = tapply(dataset$F_VICTIMES, dataset$mes, length)[nom.mes],
                        nom = factor(nom.mes, levels = nom.mes),
                        color = color.estacio)

# Plots de les dades
ggplot(data = d.mes.VM, aes(x = nom, y = n), col) +
  geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.mes.VM$color, color = 'black') + 
  ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Hora") +
  coord_polar()

ggplot(data = d.mes.VM, aes(x = nom, y = victimes), col) +
  geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.mes.VM$color, color = 'black') + 
  ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Hora") +
  coord_polar()

ggplot(data = d.mes.VM, aes(x = nom, y = morts), col) +
  geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.mes.VM$color, color = 'black') + 
  ggtitle("Morts per accident") + ylab("Mitja") + xlab("Hora") +
  coord_polar()

Analisi

# Conjunt de dades dels mesos
d.mes.all <- data.frame(victimes = dataset$F_VICTIMES,
                         morts = dataset$F_MORTS,
                         mes = dataset$mes,
                         theta = as.numeric(format(dataset$data, "%m")) * pi / 6)

# Calcul de la mitjana circular
d.mes.V <- circular.mean(tapply(d.mes.all$victimes, d.mes.all$theta, mean))
d.mes.M <- circular.mean(tapply(d.mes.all$morts, d.mes.all$theta, mean))

d.mes.theta.V <- d.mes.V[1]
d.mes.var.V <- d.mes.V[2]
d.mes.theta.M <- d.mes.M[1]
d.mes.var.M <- d.mes.M[2]

#Preparació pel test permutacional
n.rep <- 10000
n.set <- nrow(d.mes.all)
d.mes.rep <- tapply(d.mes.all$mes, d.mes.all$theta, length)

p.mes.theta.V <- numeric(n.rep)
p.mes.var.V <- numeric(n.rep)
p.mes.theta.M <- numeric(n.rep)
p.mes.var.M <- numeric(n.rep)

# Fer les permutacions
for(i in 1:n.rep) {
  perm <- sample(n.set)
  p.mes.V <- circular.mean(resample.means(d.mes.all$victimes, d.mes.rep ,perm))
  p.mes.M <- circular.mean(resample.means(d.mes.all$morts, d.mes.rep, perm))
  p.mes.theta.V[i] <- p.mes.V[1]
  p.mes.var.V[i] <- p.mes.V[2]
  p.mes.theta.M[i] <- p.mes.M[1]
  p.mes.var.M[i] <- p.mes.M[2]
}

# Plot variancia (Victimes)
plot.mes.limit.V <- max(c(p.mes.var.V, d.mes.var.V * 1.1))
hist(p.mes.var.V, xlim = c(0, plot.mes.limit.V), main = '"Variancia" de l\'mes per victimes')
p.value.mes.V <- sum(p.mes.var.V >= d.mes.var.V) / n.rep
lines(rep(d.mes.var.V, 2), c(0, n.rep), col = 'red')

# Plot variancia (Morts)
plot.mes.limit.M <- max(c(p.mes.var.M, d.mes.var.M * 1.1))
hist(p.mes.var.M, xlim = c(0, plot.mes.limit.M), main = '"Variancia" de l\'mes per morts')
p.value.mes.M <- sum(p.mes.var.M >= d.mes.var.M) / n.rep
lines(rep(d.mes.var.M, 2), c(0, n.rep), col = 'red')

# Mostra les dades
{
  cat('Víctimes p-value:', p.value.mes.V, '\n')
  cat('Morts p-value:', p.value.mes.M, '\n')
  cat('Víctimes mitja circular:', d.mes.theta.V * 6 / pi, ', variància circular:', d.mes.var.V, '\n')
  cat('Morts mitja circular:', d.mes.theta.M * 6 / pi, ', variància circular:', d.mes.var.M, '\n')
}
## Víctimes p-value: 0.0622 
## Morts p-value: 0.4254 
## Víctimes mitja circular: 8.007152 , variància circular: 0.0008382164 
## Morts mitja circular: 9.627163 , variància circular: 0.001744607
# Scatter plot de les permutacions (Victimes)
plot(p.mes.var.V * sin(p.mes.theta.V), p.mes.var.V * cos(p.mes.theta.V),
     asp = 1, col = 'blue', lwd = .1, cex = .3,
     xlab = '', ylab = '', main = 'scatter plot de les permutacions per victimes')
points(d.mes.var.V * sin(d.mes.theta.V), d.mes.var.V * cos(d.mes.theta.V),
       col = 'red', cex = .5)

# Scatter plot de les permutacions (Morts)
plot(p.mes.var.M * sin(p.mes.theta.M), p.mes.var.M * cos(p.mes.theta.M),
     asp = 1, col = 'blue', lwd = .1, cex = .3,
     xlab = '', ylab = '', main = 'scatter plot de les permutacions per morts')
points(d.mes.var.M * sin(d.mes.theta.M), d.mes.var.M * cos(d.mes.theta.M),
       col = 'red', cex = .5)

Lloc

Tipus de via

# Vectors rellevants
nom.via <- names(sort(tapply(dataset$F_VICTIMES, dataset$D_TIPUS_VIA, length), decreasing = TRUE))
nom.viaComp <- c('Via urbana', 'Carretera\nconvencional',
                 'Altres', 'Autovia', 'Autopista',
                 'Camí rural\nPista forestal')

# DataFrame de les dades importants
d.via.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$D_TIPUS_VIA, mean)[nom.via],
                       morts = tapply(dataset$F_MORTS, dataset$D_TIPUS_VIA, mean)[nom.via],
                       n = tapply(dataset$F_VICTIMES, dataset$D_TIPUS_VIA, length)[nom.via],
                       nom = factor(nom.viaComp, levels = nom.viaComp))

# Plots de les dades
ggplot(data = d.via.VM, aes(y = n, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'white', color = 'black') +
  ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Tipus de via") +
  scale_y_log10()

ggplot(data = d.via.VM, aes(y = victimes, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'white', color = 'black') +
  ggtitle("Victimes") + ylab("Mitja") + xlab("Tipus de via")

ggplot(data = d.via.VM, aes(y = morts, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'white', color = 'black') +
  ggtitle("Morts") + ylab("Mitja") + xlab("Tipus de via")

Analisi

# Conjubt de dades del tipus de via
d.via.all <- data.frame(victimes = dataset$F_VICTIMES,
                       morts = dataset$F_MORTS,
                       via = dataset$D_TIPUS_VIA)

# Preparacion pel test bootsrap no parametric
n.rep = 10000
d.via.c.V <- tapply(d.via.all$victimes, d.via.all$via, c)
d.via.c.M <- tapply(d.via.all$morts, d.via.all$via, c)

p.via.mean.V <- list()
p.via.mean.M <- list()
for(via in nom.via) {
  p.via.mean.V[[via]] = numeric(n.rep)
  p.via.mean.M[[via]] = numeric(n.rep)
}

# Fer el test
for(i in 1:n.rep) {
  t.via.mean.V <- lapply(lapply(d.via.c.V, sample, replace = T), mean)
  t.via.mean.M <- lapply(lapply(d.via.c.M, sample, replace = T), mean)
  for(via in nom.via) {
    p.via.mean.V[[via]][i] = t.via.mean.V[[via]]
    p.via.mean.M[[via]][i] = t.via.mean.M[[via]]
  }
}

# Calcular resultats
p.via.VM <- data.frame(victimes = c(unlist(lapply(p.via.mean.V, mean)))[nom.via],
                       morts = c(unlist(lapply(p.via.mean.M, mean)))[nom.via],
                       nom = factor(nom.viaComp, levels = nom.viaComp))

for(i in 1:length(nom.via)) {
  via = nom.via[i]
  p.via.VM$IC.max.V[i] = 2 * p.via.VM$victimes[i] - quantile(p.via.mean.V[[via]], 0.025)
  p.via.VM$IC.min.V[i] = 2 * p.via.VM$victimes[i] - quantile(p.via.mean.V[[via]], 0.975)
  p.via.VM$IC.max.M[i] = 2 * p.via.VM$morts[i] - quantile(p.via.mean.M[[via]], 0.025)
  p.via.VM$IC.min.M[i] = 2 * p.via.VM$morts[i] - quantile(p.via.mean.M[[via]], 0.975)
}

# Mostrar les dades
for(i in 1:length(nom.via)) {
  via = nom.via[i]
  cat('Via:', via, '\n')
  cat('  victimes:', p.via.VM$IC.min.V[i], p.via.VM$victimes[i], p.via.VM$IC.max.V[i], '\n')
  cat('  morts:', p.via.VM$IC.min.M[i], p.via.VM$morts[i], p.via.VM$IC.max.M[i], '\n')
}
## Via: Via urbana( inclou carrer i carrer residencial) 
##   victimes: 1.284365 1.300541 1.316261 
##   morts: 0.06750946 0.07338852 0.07908654 
## Via: Carretera convencional 
##   victimes: 1.797245 1.834217 1.869096 
##   morts: 0.20147 0.2133012 0.2249458 
## Via: Altres 
##   victimes: 1.396855 1.537785 1.651875 
##   morts: 0.134294 0.1765847 0.2146153 
## Via: Autovia 
##   victimes: 1.549435 1.726016 1.876275 
##   morts: 0.1300286 0.1689104 0.2057861 
## Via: Autopista 
##   victimes: 1.746406 1.972751 2.176271 
##   morts: 0.1573792 0.2257484 0.2886009 
## Via: Camí rural/pista forestal 
##   victimes: 1.155132 1.304516 1.424813 
##   morts: 0.07211489 0.1424404 0.1997745
# Plot dels resultats (Victimes)
ggplot(data = p.via.VM, aes(y = victimes, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'white', color = 'black') +
  geom_errorbar(aes(ymin = IC.min.V, ymax = IC.max.V), width=.2, position=position_dodge(.9)) +
  ggtitle("Victimes") + ylab("Mitja") + xlab("Tipus de via")

# Plot dels resultats (Morts)
ggplot(data = p.via.VM, aes(y = morts, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'white', color = 'black') +
  geom_errorbar(aes(ymin = IC.min.M, ymax = IC.max.M), width=.2, position=position_dodge(.9)) +
  ggtitle("Morts") + ylab("Mitja") + xlab("Tipus de via")

Tipus de Carril

# Vectors rellevants
nom.carril <- names(sort(tapply(dataset$F_VICTIMES, dataset$D_CARRIL_ESPECIAL, length), decreasing = TRUE))
nom.carril <- nom.carril[nom.carril != 'Sense Especificar']
nom.carrilComp <- gsub("/", "\n", gsub(" ", "\n", nom.carril))

# DataFrame de les dades importants
d.carril.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$D_CARRIL_ESPECIAL, mean)[nom.carril],
                       morts = tapply(dataset$F_MORTS, dataset$D_CARRIL_ESPECIAL, mean)[nom.carril],
                       n = tapply(dataset$F_VICTIMES, dataset$D_CARRIL_ESPECIAL, length)[nom.carril],
                       nom = factor(nom.carrilComp, levels = nom.carrilComp))

# Plots de les dades
ggplot(data = d.carril.VM, aes(y = n, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'darkgreen', color = 'black') +
  ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Tipus de carril") +
  scale_y_log10()

ggplot(data = d.carril.VM, aes(y = victimes, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'darkgreen', color = 'black') +
  ggtitle("Victimes") + ylab("Mitja") + xlab("Tipus de carril")

ggplot(data = d.carril.VM, aes(y = morts, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'darkgreen', color = 'black') +
  ggtitle("Morts") + ylab("Mitja") + xlab("Tipus de carril")

Analisi

# Conjunt de dades sobre el tipus de carril
d.carril.all <- data.frame(victimes = dataset$F_VICTIMES,
                       morts = dataset$F_MORTS,
                       carril = dataset$D_CARRIL_ESPECIAL)

# Preparacion pel test bootsrap no parametric
n.rep = 10000
d.carril.c.V <- tapply(d.carril.all$victimes, d.carril.all$carril, c)
d.carril.c.M <- tapply(d.carril.all$morts, d.carril.all$carril, c)

p.carril.mean.V <- list()
p.carril.mean.M <- list()
for(carril in nom.carril) {
  p.carril.mean.V[[carril]] = numeric(n.rep)
  p.carril.mean.M[[carril]] = numeric(n.rep)
}

# Fer el test
for(i in 1:n.rep) {
  t.carril.mean.V <- lapply(lapply(d.carril.c.V, sample, replace = T), mean)
  t.carril.mean.M <- lapply(lapply(d.carril.c.M, sample, replace = T), mean)
  for(carril in nom.carril) {
    p.carril.mean.V[[carril]][i] = t.carril.mean.V[[carril]]
    p.carril.mean.M[[carril]][i] = t.carril.mean.M[[carril]]
  }
}

# Calcular rresultats
p.carril.VM <- data.frame(victimes = c(unlist(lapply(p.carril.mean.V, mean)))[nom.carril],
                       morts = c(unlist(lapply(p.carril.mean.M, mean)))[nom.carril],
                       nom = factor(nom.carrilComp, levels = nom.carrilComp))

for(i in 1:length(nom.carril)) {
  carril = nom.carril[i]
  p.carril.VM$IC.max.V[i] = 2 * d.carril.VM$victimes[i] - quantile(p.carril.mean.V[[carril]], 0.025)
  p.carril.VM$IC.min.V[i] = max(2 * d.carril.VM$victimes[i] - quantile(p.carril.mean.V[[carril]], 0.975), 0)
  p.carril.VM$IC.max.M[i] = 2 * d.carril.VM$morts[i] - quantile(p.carril.mean.M[[carril]], 0.025)
  p.carril.VM$IC.min.M[i] = max(2 * d.carril.VM$morts[i] - quantile(p.carril.mean.M[[carril]], 0.975), 0)
}

# Mostrar les dades
for(i in 1:length(nom.carril)) {
  carril = nom.carril[i]
  cat('Carril:', carril, '\n')
  cat('  victimes:', p.carril.VM$IC.min.V[i], p.carril.VM$victimes[i], p.carril.VM$IC.max.V[i], '\n')
  cat('  morts:', p.carril.VM$IC.min.M[i], p.carril.VM$morts[i], p.carril.VM$IC.max.M[i], '\n')
}
## Carril: No n'hi ha 
##   victimes: 1.533443 1.553491 1.572363 
##   morts: 0.1391188 0.1458223 0.1525367 
## Carril: Carril bus 
##   victimes: 1.211712 1.372667 1.500113 
##   morts: 0.05855856 0.1077153 0.1486486 
## Carril: Carril bici 
##   victimes: 1.166667 1.276782 1.373016 
##   morts: 0.02380952 0.07922063 0.1269841 
## Carril: Altres 
##   victimes: 1.232 1.526413 1.744 
##   morts: 0.088 0.1518584 0.208 
## Carril: Carril acceleració 
##   victimes: 1.475806 1.759371 2.008065 
##   morts: 0.09677419 0.1694806 0.233871 
## Carril: Carril central 
##   victimes: 1.559322 1.863864 2.127119 
##   morts: 0.08474576 0.1703822 0.2457627 
## Carril: Carril d'alentiment 
##   victimes: 1.5 1.826618 2.108108 
##   morts: 0.02702703 0.1079459 0.1756757 
## Carril: Carril lent 
##   victimes: 1.8 2.217663 2.583333 
##   morts: 0.1333333 0.2329983 0.3333333 
## Carril: Carril avançament 
##   victimes: 1.861111 2.442481 2.972222 
##   morts: 0.2777778 0.4710639 0.6666667 
## Carril: Habilitació voral/carril addicional 
##   victimes: 1.102586 1.448055 1.758621 
##   morts: 0.06896552 0.2404655 0.3793103 
## Carril: Carril reversible 
##   victimes: 1.292647 2.1784 2.882353 
##   morts: 0 0.05867647 0.1176471 
## Carril: Carril habilitat en sentit contrari habitual 
##   victimes: 1 1.753808 2.333333 
##   morts: 0 0.16575 0.3333333
#Plot dels resultats (Victimes)
ggplot(data = p.carril.VM, aes(y = victimes, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'darkgreen', color = 'black') +
  geom_errorbar(aes(ymin = IC.min.V, ymax = IC.max.V), width=.2, position=position_dodge(.9)) +
  ggtitle("Victimes") + ylab("Mitja") + xlab("Tipus de carril")

#Plot dels resultats (Morts)
ggplot(data = p.carril.VM, aes(y = morts, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'darkgreen', color = 'black') +
  geom_errorbar(aes(ymin = IC.min.M, ymax = IC.max.M), width=.2, position=position_dodge(.9)) +
  ggtitle("Morts") + ylab("Mitja") + xlab("Tipus de carril")

Altres factors

Velocitat màxima

# Vectors rellevats
val.vel <- levels(factor(dataset$C_VELOCITAT_VIA))
val.vel10 <- seq(10, 120, 10)
nom.vel <- paste(val.vel, 'km/h')

# Conjunt de dades sobre la velocitat
d.vel.all <- data.frame(victimes = dataset$F_VICTIMES[!is.na(dataset$C_VELOCITAT_VIA)],
                       morts = dataset$F_MORTS[!is.na(dataset$C_VELOCITAT_VIA)],
                       velocitat = dataset$C_VELOCITAT_VIA[!is.na(dataset$C_VELOCITAT_VIA)],
                       vel10 = factor(dataset$C_VELOCITAT_VIA[!is.na(dataset$C_VELOCITAT_VIA)] %/% 10 * 10, levels = val.vel10))
d.vel.all$MpV100 <- d.vel.all$morts / d.vel.all$victimes * 100

# DtaFrame de les dades importants
d.vel.VM <- data.frame(victimes = tapply(d.vel.all$victimes, d.vel.all$velocitat, mean)[val.vel],
                       morts = tapply(d.vel.all$morts, d.vel.all$velocitat, mean)[val.vel],
                       MpV100 = tapply(d.vel.all$MpV100, d.vel.all$velocitat, mean)[val.vel],
                       n = tapply(d.vel.all$victimes, d.vel.all$velocitat, length)[val.vel],
                       nom = factor(nom.vel, levels = nom.vel))

#Plot de les dades
ggplot(data = d.vel.VM, aes(y = n, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
  ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Velocitat màxima") +
  scale_y_continuous(trans = 'log10')

ggplot(data = d.vel.VM, aes(y = victimes, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
  ggtitle("Victimes") + ylab("Mitja") + xlab("Velocitat màxima")

ggplot(data = d.vel.VM, aes(y = morts, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
  ggtitle("Morts") + ylab("Mitja") + xlab("Velocitat màxima")

ggplot(data = d.vel.VM, aes(y = MpV100, x = nom)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
  ggtitle("Percentatge de mortalitat de les victimes") + ylab("Percentatge de víctimes mortes") + xlab("Velocitat màxima")

Analisi

# Calcul de les correlacion originals
d.vel.pearson.V = cor(d.vel.all$victimes, d.vel.all$velocitat, method = 'pearson')
d.vel.spearman.V = cor(d.vel.all$victimes, d.vel.all$velocitat, method = 'spearman')
d.vel.pearson.M = cor(d.vel.all$morts, d.vel.all$velocitat, method = 'pearson')
d.vel.spearman.M = cor(d.vel.all$morts, d.vel.all$velocitat, method = 'spearman')
d.vel.pearson.MpV100 = cor(d.vel.all$MpV100, d.vel.all$velocitat, method = 'pearson')
d.vel.spearman.MpV100 = cor(d.vel.all$MpV100, d.vel.all$velocitat, method = 'spearman')

#Preparació de test permutacional
n.rep <- 3000
p.vel.pearson.V <- numeric(n.rep)
p.vel.spearman.V <- numeric(n.rep)
p.vel.pearson.M <- numeric(n.rep)
p.vel.spearman.M <-numeric(n.rep)
p.vel.pearson.MpV100 <- numeric(n.rep)
p.vel.spearman.MpV100 <-numeric(n.rep)

# Fer permutacions
for(i in 1:n.rep) {
  p.vel <- sample(d.vel.all$velocitat)
  p.vel.pearson.V[i] = cor(d.vel.all$victimes, p.vel, method = 'pearson')
  p.vel.spearman.V[i] = cor(d.vel.all$victimes, p.vel, method = 'spearman')
  p.vel.pearson.M[i] = cor(d.vel.all$morts, p.vel, method = 'pearson')
  p.vel.spearman.M[i] = cor(d.vel.all$morts, p.vel, method = 'spearman')
  p.vel.pearson.MpV100[i] = cor(d.vel.all$MpV100, p.vel, method = 'pearson')
  p.vel.spearman.MpV100[i] = cor(d.vel.all$MpV100, p.vel, method = 'spearman')
}

# Plots de Victimes
plot.vel.limit.pearson.V <- c(min(p.vel.pearson.V) * 1.1,
                              max(c(p.vel.pearson.V, d.vel.pearson.V) * 1.1))
hist(p.vel.pearson.V, xlim = plot.vel.limit.pearson.V, main = 'Correlació entre velocitat i víctimes (pearson)')
p.value.vel.pearson.V <- sum(p.vel.pearson.V >= d.vel.pearson.V) / n.rep
lines(rep(d.vel.pearson.V, 2), c(0, n.rep), col = 'red')

plot.vel.limit.spearman.V <- c(min(p.vel.spearman.V) * 1.1,
                               max(c(p.vel.spearman.V, d.vel.spearman.V) * 1.1))
hist(p.vel.spearman.V, xlim = plot.vel.limit.spearman.V, main = 'Correlació entre velocitat i víctimes (spearman)')
p.value.vel.spearman.V <- sum(p.vel.spearman.V >= d.vel.spearman.V) / n.rep
lines(rep(d.vel.spearman.V, 2), c(0, n.rep), col = 'red')

# Plots de Morts
plot.vel.limit.pearson.M <- c(min(p.vel.pearson.M) * 1.1,
                              max(c(p.vel.pearson.M, d.vel.pearson.M) * 1.1))
hist(p.vel.pearson.M, xlim = plot.vel.limit.pearson.M, main = 'Correlació entre velocitat i morts (pearson)')
p.value.vel.pearson.M <- sum(p.vel.pearson.M >= d.vel.pearson.M) / n.rep
lines(rep(d.vel.pearson.M, 2), c(0, n.rep), col = 'red')

plot.vel.limit.spearman.M <- c(min(p.vel.spearman.M) * 1.1,
                               max(c(p.vel.spearman.M, d.vel.spearman.M) * 1.1))
hist(p.vel.spearman.M, xlim = plot.vel.limit.spearman.M, main = 'Correlació entre velocitat i morts (spearman)')
p.value.vel.spearman.M <- sum(p.vel.spearman.M >= d.vel.spearman.M) / n.rep
lines(rep(d.vel.spearman.M, 2), c(0, n.rep), col = 'red')

# Plots de Mortalitat
plot.vel.limit.pearson.MpV100 <- c(min(p.vel.pearson.MpV100) * 1.1,
                              max(c(p.vel.pearson.MpV100, d.vel.pearson.MpV100) * 1.1))
hist(p.vel.pearson.MpV100, xlim = plot.vel.limit.pearson.MpV100, main = 'Correlació entre velocitat i percentatge de víctimes mortes (pearson)')
p.value.vel.pearson.MpV100 <- sum(p.vel.pearson.MpV100 >= d.vel.pearson.MpV100) / n.rep
lines(rep(d.vel.pearson.MpV100, 2), c(0, n.rep), col = 'red')

plot.vel.limit.spearman.MpV100 <- c(min(p.vel.spearman.MpV100) * 1.1,
                               max(c(p.vel.spearman.MpV100, d.vel.spearman.MpV100) * 1.1))
hist(p.vel.spearman.MpV100, xlim = plot.vel.limit.spearman.MpV100, main = 'Correlació entre velocitat i percentatge de víctimes mortes (spearman)')
p.value.vel.spearman.MpV100 <- sum(p.vel.spearman.MpV100 >= d.vel.spearman.MpV100) / n.rep
lines(rep(d.vel.spearman.MpV100, 2), c(0, n.rep), col = 'red')

# Mostrar les dades
{
  cat('Victimes:\n')
  cat('  Pearson: ', d.vel.pearson.V, ',p-value:', p.value.vel.pearson.V, '\n')
  cat('  Spearman: ', d.vel.spearman.V, ',p-value:', p.value.vel.spearman.V, '\n')
  cat('Morts:\n')
  cat('  Pearson: ', d.vel.pearson.M, ',p-value:', p.value.vel.pearson.M, '\n')
  cat('  Spearman: ', d.vel.spearman.M, ',p-value:', p.value.vel.spearman.M, '\n')
  cat('Mortalitat:\n')
  cat('  Pearson: ', d.vel.pearson.MpV100, ',p-value:', p.value.vel.pearson.MpV100, '\n')
  cat('  Spearman: ', d.vel.spearman.MpV100, ',p-value:', p.value.vel.spearman.MpV100, '\n')
}
## Victimes:
##   Pearson:  0.04008627 ,p-value: 0 
##   Spearman:  0.01377029 ,p-value: 0.057 
## Morts:
##   Pearson:  0.04403149 ,p-value: 0 
##   Spearman:  0.0318193 ,p-value: 0.001 
## Mortalitat:
##   Pearson:  0.0351944 ,p-value: 0 
##   Spearman:  0.03048427 ,p-value: 0.001
# Calcular pendents
d.vel.fit.V <- lm(victimes~velocitat, data = d.vel.all)
d.vel.fit.M <- lm(morts~velocitat, data = d.vel.all)
d.vel.fit.MpV100 <- lm(MpV100~velocitat, data = d.vel.all)

d.vel.10.VM <- data.frame(victimes = tapply(d.vel.all$victimes, d.vel.all$vel10, mean),
                          morts = tapply(d.vel.all$morts, d.vel.all$vel10, mean),
                          MpV100 = tapply(d.vel.all$MpV100, d.vel.all$vel10, mean),
                          n = tapply(d.vel.all$victimes, d.vel.all$vel10, length),
                          vel10 = val.vel10)

# Plots de les pendents (Victimes)
ggplot(data = d.vel.10.VM, aes(y = victimes, x = vel10)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
  ggtitle("Víctimes") + ylab("Mitja") + xlab("Velocitat màxima") +
  geom_abline(intercept = summary(d.vel.fit.V)$coefficient[1],
              slope = summary(d.vel.fit.V)$coefficient[2], color = 'red')

# Plots de les pendents (Morts)
ggplot(data = d.vel.10.VM, aes(y = morts, x = vel10)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
  ggtitle("Morts") + ylab("Mitja") + xlab("Velocitat màxima") +
  geom_abline(intercept = summary(d.vel.fit.M)$coefficient[1],
              slope = summary(d.vel.fit.M)$coefficient[2], color = 'red')

# Plots de les pendents (Mortalitat)
ggplot(data = d.vel.10.VM, aes(y = MpV100, x = vel10)) +
  geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
  ggtitle("Percentatge de mortalitat de les victimes") + ylab("Percentatge de víctimes mortes") + xlab("Velocitat màxima") +
  geom_abline(intercept = summary(d.vel.fit.MpV100)$coefficient[1],
              slope = summary(d.vel.fit.MpV100)$coefficient[2], color = 'red')

# Calcular intervals de confiança per les pendents
d.vel.IC.V <- summary(d.vel.fit.V)$coefficient[2] + c(-1.96, 1.96) * summary(d.vel.fit.V)$coefficient[4]
d.vel.IC.M <- summary(d.vel.fit.M)$coefficient[2] + c(-1.96, 1.96) * summary(d.vel.fit.M)$coefficient[4]
d.vel.IC.MpV100 <- summary(d.vel.fit.MpV100)$coefficient[2] + c(-1.96, 1.96) * summary(d.vel.fit.MpV100)$coefficient[4]

# Mostrar IdC
{
  show(d.vel.IC.V)
  show(d.vel.IC.M)
  show(d.vel.IC.MpV100)
}
## [1] 0.001247525 0.003019904
## [1] 0.0004982629 0.0011039999
## [1] 0.02220343 0.06208294

Visibilitat

# Conjunt de dades sobre la visibilitat
d.visib.all <- data.frame(victimes = dataset$F_VICTIMES, morts = dataset$F_MORTS,
                         boira = dataset$D_BOIRA,
                         nit = dataset$grupHor,
                         llum = dataset$D_LLUMINOSITAT)

levels(d.visib.all$boira) <- c("No n'hi ha", 'Si')
levels(d.visib.all$nit) <- c('Dia', 'Nit', 'Dia')
levels(d.visib.all$llum) <- c('Reduida', 'Bona', 'Reduida', 'Reduida', 'Bona', 'Cap', 'NA')

d.visib.mean.Total <- c(tapply(d.visib.all$victimes, d.visib.all$boira, length),
                        tapply(d.visib.all$victimes, d.visib.all$nit, length),
                        tapply(d.visib.all$victimes, d.visib.all$llum, length))
d.visib.mean.V <- c(tapply(d.visib.all$victimes, d.visib.all$boira, mean),
                    tapply(d.visib.all$victimes, d.visib.all$nit, mean),
                    tapply(d.visib.all$victimes, d.visib.all$llum, mean))
d.visib.mean.M <- c(tapply(d.visib.all$morts, d.visib.all$boira, mean),
                    tapply(d.visib.all$morts, d.visib.all$nit, mean),
                    tapply(d.visib.all$morts, d.visib.all$llum, mean))

d.visib.mean.Total <- d.visib.mean.Total[names(d.visib.mean.Total) != 'NA']
d.visib.mean.V <- d.visib.mean.V[names(d.visib.mean.V) != 'NA']
d.visib.mean.M <- d.visib.mean.M[names(d.visib.mean.M) != 'NA']

d.visib.type <- factor(c(rep('boira', sum(levels(d.visib.all$boira) != 'NA')),
                         rep('nit', sum(levels(d.visib.all$nit) != 'NA')),
                         rep('iluminació', sum(levels(d.visib.all$llum) != 'NA'))),
                       levels = c('boira', 'nit', 'iluminació'))
d.visib.class <- factor(names(d.visib.mean.V),
                        levels = c('Si', "No n'hi ha", 'Dia', 'Nit', 'Bona', 'Reduida', 'Cap'))
d.visib.opt <- levels(d.visib.class)

# DataFrame de les dades mési importants sobre la visibilitat
d.visib.VM <- data.frame(victimes = d.visib.mean.V,
                         morts = d.visib.mean.M,
                         n = d.visib.mean.Total,
                         class = d.visib.class,
                         type = d.visib.type)

# Plot de les dades (Victimes)
ggplot(d.visib.VM, aes(y = victimes, x = type, fill = class)) +
    geom_bar(stat = "identity", position = "dodge") + 
    ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Factor de visibilitat")

# Plot de les dades (Morts)
ggplot(d.visib.VM, aes(y = morts, x = type, fill = class)) +
    geom_bar(stat = "identity", position = "dodge") + 
    ggtitle("Morts per accident") + ylab("Mitja") + xlab("Factor de visibilitat")

Analisi

# Preparació per fer bootstrap
n.rep <- 5000
bst.visib.mean.V <- data.frame(i = 1:n.rep)
bst.visib.mean.M <- data.frame(i = 1:n.rep)

# Bucle de les imulacions de boostrap
for(i in 1:n.rep) {
  for(j in 1:7) {
    bst.visib.mean.V[[d.visib.opt[d.visib.class[j]]]][i] = mean(rpois(d.visib.VM$n[j], d.visib.VM$victimes[j]))
    bst.visib.mean.M[[d.visib.opt[d.visib.class[j]]]][i] = mean(rpois(d.visib.VM$n[j], d.visib.VM$morts[j]))
  }
}

bst.visib.VM <- data.frame(victimes = c(unlist(lapply(bst.visib.mean.V, mean)))[d.visib.opt],
                           IC.min.V = c(unlist(lapply(bst.visib.mean.V, quantile, 0.025)))[paste(d.visib.opt, '2.5%', sep = '.')],
                           IC.max.V = c(unlist(lapply(bst.visib.mean.V, quantile, 0.975)))[paste(d.visib.opt, '97.5%', sep = '.')],
                           morts = c(unlist(lapply(bst.visib.mean.M, mean)))[d.visib.opt],
                           IC.min.M = c(unlist(lapply(bst.visib.mean.M, quantile, 0.025)))[paste(d.visib.opt, '2.5%', sep = '.')],
                           IC.max.M = c(unlist(lapply(bst.visib.mean.M, quantile, 0.975)))[paste(d.visib.opt, '97.5%', sep = '.')],
                           class = factor(d.visib.opt, levels = d.visib.opt),
                           type = d.visib.type)

# Mostrar les dades
for(i in 1:length(d.visib.opt)) {
  opt = d.visib.opt[i]
  type = levels(bst.visib.VM$type)[bst.visib.VM$type[i]]
  cat(type, ':', opt, '\n')
  cat('  victimes:', bst.visib.VM$IC.min.V[i], bst.visib.VM$victimes[i], bst.visib.VM$IC.max.V[i], '\n')
  cat('  morts:', bst.visib.VM$IC.min.M[i], bst.visib.VM$morts[i], bst.visib.VM$IC.max.M[i], '\n')
}
## boira : Si 
##   victimes: 1.409543 1.515593 1.624254 
##   morts: 0.07554672 0.1017256 0.1312127 
## boira : No n'hi ha 
##   victimes: 1.532481 1.551747 1.571262 
##   morts: 0.1353328 0.1410869 0.1469486 
## nit : Dia 
##   victimes: 1.504164 1.524375 1.544377 
##   morts: 0.1230113 0.1287789 0.1344526 
## nit : Nit 
##   victimes: 1.686458 1.743595 1.800799 
##   morts: 0.2011982 0.2215533 0.2421368 
## iluminació : Bona 
##   victimes: 1.466888 1.487408 1.507912 
##   morts: 0.1130081 0.1187576 0.1245381 
## iluminació : Reduida 
##   victimes: 1.659554 1.715318 1.771581 
##   morts: 0.1639185 0.182392 0.2007759 
## iluminació : Cap 
##   victimes: 1.913265 1.992344 2.07568 
##   morts: 0.2789116 0.3095551 0.3409864
# Plot dels resultats (Victimes)
ggplot(data = bst.visib.VM, aes(y = victimes, x = type, fill = class)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = IC.min.V, ymax = IC.max.V), width=.2, position=position_dodge(.9)) +
  ggtitle("Victimes") + ylab("Mitja") + xlab("Tipus de carril")

# Plot dels resultats (Morts)
ggplot(data = bst.visib.VM, aes(y = morts, x = type, fill = class)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = IC.min.M, ymax = IC.max.M), width=.2, position=position_dodge(.9)) +
  ggtitle("Morts") + ylab("Mitja") + xlab("Tipus de carril")

Estat del terreny

# Vectors rellevants
nom.superficie <- names(sort(tapply(dataset$F_VICTIMES, dataset$D_SUPERFICIE, length), decreasing = TRUE))
nom.superficie <- nom.superficie[nom.superficie != 'Sense especificar']

# DataFrame de les dades importants
d.sup.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$D_SUPERFICIE, mean)[nom.superficie],
                        morts = tapply(dataset$F_MORTS, dataset$D_SUPERFICIE, mean)[nom.superficie],
                        n = tapply(dataset$F_VICTIMES, dataset$D_SUPERFICIE, length)[nom.superficie],
                        nom = factor(nom.superficie, levels = nom.superficie),
                        color = 'red')

# Plot de les dades
ggplot(data = d.sup.VM, aes(x = nom, y = n), col) +
  geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.sup.VM$color, color = 'black') + 
  ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Temporal") +
  scale_y_continuous(trans = 'log10')

ggplot(data = d.sup.VM, aes(x = nom, y = victimes), col) +
  geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.sup.VM$color, color = 'black') + 
  ggtitle("Histograma de numero d\'accidents") + ylab("Victimes") + xlab("Temporal")

ggplot(data = d.sup.VM, aes(x = nom, y = morts), col) +
  geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.sup.VM$color, color = 'black') + 
  ggtitle("Histograma de numero d\'accidents") + ylab("Morts") + xlab("Temporal")

Analisi

# Declaracio de variables rellevants
d.sup.type.n <- 1:length(nom.superficie)
d.sup.n <- sum(d.sup.VM$n)

#Conjunt de dades sobre la superficie
d.sup.all <- data.frame(victimes = dataset$F_VICTIMES[dataset$D_SUPERFICIE != 'Sense especificar'],
                        morts = dataset$F_MORTS[dataset$D_SUPERFICIE != 'Sense especificar'],
                        superficie = dataset$D_SUPERFICIE[dataset$D_SUPERFICIE != 'Sense especificar'])

# Preparació per fer non parametric boostrap
n.rep <- 10000
p.sup.V <- matrix(nrow = length(nom.superficie), ncol = n.rep)
p.sup.M <- matrix(nrow = length(nom.superficie), ncol = n.rep)

# Bucle de les permutacions
for(i in 1:n.rep) {
  perm <- sample(d.sup.n, replace = T)
  p.sup.V[, i] <- resample.means(d.sup.all$victimes, d.sup.VM$n, perm)
  p.sup.M[, i] <- resample.means(d.sup.all$morts, d.sup.VM$n, perm)
}

# Calcular els resultats
dp.sup.VM <- data.frame(victimes = d.sup.VM$victimes,
                        morts = d.sup.VM$morts,
                        nom = factor(nom.superficie, levels = nom.superficie))

for(i in d.sup.type.n) {
    dp.sup.VM$IC.max.V[i] = 2 * mean(p.sup.V[i, ]) - quantile(p.sup.V[i, ], 0.025)
    dp.sup.VM$IC.min.V[i] = max(2 * mean(p.sup.V[i, ]) - quantile(p.sup.V[i, ], 0.975), 0)
    dp.sup.VM$IC.max.M[i] = 2 * mean(p.sup.M[i, ]) - quantile(p.sup.M[i, ], 0.025)
    dp.sup.VM$IC.min.M[i] = max(2 * mean(p.sup.M[i, ]) - quantile(p.sup.M[i, ], 0.975), 0)
}

# Mostrar les dades
for(i in 1:length(nom.superficie)) {
  terreny = nom.superficie[i]
  cat('Terreny:', terreny, '\n')
  cat('  victimes:', dp.sup.VM$victimes[i], ', interval de confiança', dp.sup.VM$IC.min.V[i], dp.sup.VM$IC.max.V[i], '\n')
  cat('  morts:', dp.sup.VM$morts[i], ', interval de confiança', dp.sup.VM$IC.min.M[i], dp.sup.VM$IC.max.M[i], '\n')
}
## Terreny: Sec i net 
##   victimes: 1.553383 , interval de confiança 1.531309 1.569086 
##   morts: 0.1405058 , interval de confiança 0.1335297 0.1462685 
## Terreny: Mullat 
##   victimes: 1.591631 , interval de confiança 1.451427 1.636132 
##   morts: 0.1385281 , interval de confiança 0.1081475 0.1687535 
## Terreny: Relliscós 
##   victimes: 1.231481 , interval de confiança 1.293381 1.747085 
##   morts: 0.08333333 , interval de confiança 0.05653148 0.2046796 
## Terreny: Inundat 
##   victimes: 1.193182 , interval de confiança 1.269014 1.769014 
##   morts: 0.1136364 , interval de confiança 0.04182045 0.212275 
## Terreny: Gelat 
##   victimes: 1.4 , interval de confiança 0.8368267 1.97016 
##   morts: 0.2666667 , interval de confiança 0 0.2783067 
## Terreny: Nevat 
##   victimes: 1.444444 , interval de confiança 0.6641778 2.108622 
##   morts: 0 , interval de confiança 0 0.2749556
# Plot dels resultats (Victimes)
ggplot(data = dp.sup.VM, aes(x = nom, y = victimes), col) +
  geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.sup.VM$color, color = 'black') + 
  geom_errorbar(aes(ymin = IC.min.V, ymax = IC.max.V), width=.2, position=position_dodge(.9)) +
  ggtitle("Histograma de numero d\'accidents") + ylab("Victimes") + xlab("Temporal")

# Plot dels resultats (Morts)
ggplot(data = dp.sup.VM, aes(x = nom, y = morts), col) +
  geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.sup.VM$color, color = 'black') + 
  geom_errorbar(aes(ymin = IC.min.M, ymax = IC.max.M), width=.2, position=position_dodge(.9)) +
  ggtitle("Histograma de numero d\'accidents") + ylab("Morts") + xlab("Temporal")

Altres gràfiques d’interès

Distribució dels minuts

Gràfiques de visibilitat amb detall